version 18.0               // version control
set processors 8           // to ensure replicability across different numbers of cores
clear all                  // clear existing data
macro drop _all            // and macros, clean slate
set seed 20220613          // set seed

*-------------------------------------------------------
* project: daylight saving time (dst)
*-------------------------------------------------------

local pgm  "dst-data01_acs_pop_and_housing_basics_clean"  // file name
local who  "Muzhe Yang"                                   // author
local dte  "2022-06-13"                                   // created date
local dte2 "`c(current_date)'"                            // last run date
local tag  "`pgm'.do, created by `who' on `dte', last run on `dte2'"

capture log close
log using "code\management\\`pgm'.txt", replace text
display "`tag'"

import excel "data_raw\acs\acs_pop_and_housing_basics.xlsx", sheet("acs_pop_and_housing_basics") firstrow clear

gen double pop = B01001_001E 
gen double pop_under_18yr = B01001_calc_numLT18E 
gen double pop_under_18yr_pct = B01001_calc_pctLT18E
gen double pop_65yr_over = B01001_calc_numGE65E 
gen double pop_65yr_over_pct = B01001_calc_pctGE65E 
gen double age_median = B01002_001E 
gen double white = B03002_003E 
gen double black = B03002_004E 
gen double hispanic = B03002_012E 
gen double white_pct = B03002_calc_pctNHWhiteE 
gen double black_pct = B03002_calc_pctBlackE 
gen double hispanic_pct = B03002_calc_pctHispLatE 
gen double hh_income_median = B19049_001E
gen double home_value_median = B25077_001E

gen double area_land = ALAND
gen double area_water = AWATER
gen double area_shape = Shape__Area
foreach var in area_land area_water area_shape {
	replace `var' = `var'/2589990
}
/* 
Notes: 
1) The area variables in the original data are measured in square meters, and we convert them to be measured in square miles. 
2) This website shows the conversion formula: https://www.unitconverters.net/
3) 1 square mile = 2589990 square meters
*/

label variable pop                "total population"
label variable white              "white alone, not hispanic or latino"
label variable black              "black alone, not hispanic or latino"
label variable hispanic           "hispanic or latino"
label variable white_pct          "% of pop. that is white alone, not hispanic or latino"
label variable black_pct          "% of pop. that is black alone, not hispanic or latino"
label variable hispanic_pct       "% of pop. that is hispanic or latino"
label variable pop_under_18yr     "population less than 18 years"
label variable pop_65yr_over      "population 65 years and over"
label variable pop_under_18yr_pct "% of population less than 18 years"
label variable pop_65yr_over_pct  "% of population 65 years and over"
label variable age_median         "median age of the total population"
label variable hh_income_median   "median household income in the past 12 months"
label variable home_value_median  "median home value (for owner-occupied housing units)"
label variable area_land          "area of land (measured in square miles)"
label variable area_water         "area of water (measured in square miles)"
label variable area_shape         "area of shape (measured in square miles)"

order pop white black hispanic white_pct black_pct hispanic_pct ///
      pop_under_18yr pop_65yr_over pop_under_18yr_pct pop_65yr_over_pct ///
	  age_median hh_income_median home_value_median ///
      area_land area_water area_shape, after(County)
keep GEOID State County /// 
     pop white black hispanic white_pct black_pct hispanic_pct ///
     pop_under_18yr pop_65yr_over pop_under_18yr_pct pop_65yr_over_pct ///
     age_median hh_income_median home_value_median ///
     area_land area_water area_shape
codebook, compact
compress
save "data_clean\acs\\`pgm'.dta", replace 

log close
exit